In [42]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
In [43]:
df = pd.read_csv("weatherdata20XX.csv")
In [44]:
df.head()
Out[44]:
| EventId | Type | Severity | StartTime(UTC) | EndTime(UTC) | Precipitation(in) | TimeZone | AirportCode | LocationLat | LocationLng | City | County | State | ZipCode | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | W-1 | Snow | Light | 2016-01-06 23:14:00 | 2016-01-07 00:34:00 | 0.00 | US/Mountain | K04V | 38.0972 | -106.1689 | Saguache | Saguache | CO | 81149.0 |
| 1 | W-2 | Snow | Light | 2016-01-07 04:14:00 | 2016-01-07 04:54:00 | 0.00 | US/Mountain | K04V | 38.0972 | -106.1689 | Saguache | Saguache | CO | 81149.0 |
| 2 | W-3 | Snow | Light | 2016-01-07 05:54:00 | 2016-01-07 15:34:00 | 0.03 | US/Mountain | K04V | 38.0972 | -106.1689 | Saguache | Saguache | CO | 81149.0 |
| 3 | W-4 | Snow | Light | 2016-01-08 05:34:00 | 2016-01-08 05:54:00 | 0.00 | US/Mountain | K04V | 38.0972 | -106.1689 | Saguache | Saguache | CO | 81149.0 |
| 4 | W-5 | Snow | Light | 2016-01-08 13:54:00 | 2016-01-08 15:54:00 | 0.00 | US/Mountain | K04V | 38.0972 | -106.1689 | Saguache | Saguache | CO | 81149.0 |
In [45]:
#count null values
df.isnull().sum()
Out[45]:
EventId 0 Type 0 Severity 0 StartTime(UTC) 0 EndTime(UTC) 0 Precipitation(in) 0 TimeZone 0 AirportCode 0 LocationLat 0 LocationLng 0 City 16912 County 0 State 0 ZipCode 69199 dtype: int64
In [46]:
#drop zipcode
df.drop('ZipCode', axis=1, inplace=True)
df.dropna(inplace=True)
df.isnull().sum()
Out[46]:
EventId 0 Type 0 Severity 0 StartTime(UTC) 0 EndTime(UTC) 0 Precipitation(in) 0 TimeZone 0 AirportCode 0 LocationLat 0 LocationLng 0 City 0 County 0 State 0 dtype: int64
In [47]:
#new column that is County and State combined
df['Location'] = df['City'] + ', ' + df['County'] + ', ' + df['State']
df['Location'].value_counts()
Out[47]:
Location
South Beach, Lincoln, OR 22098
Tampa, Hillsborough, FL 21423
Tillamook, Tillamook, OR 20154
Jacksonville, Duval, FL 19284
Panama City, Bay, FL 18599
...
Campo, San Diego, CA 43
Hanksville, Wayne, UT 37
Colville, Stevens, WA 33
South Padre Island, Cameron, TX 8
Richmond, Madison, KY 1
Name: count, Length: 1949, dtype: int64
In [48]:
df.drop(['City','County'], axis=1, inplace=True)
In [49]:
df['Severity'].value_counts()
Out[49]:
Severity Light 5140348 Severe 1724357 Moderate 1366707 Heavy 218900 UNK 157036 Other 2921 Name: count, dtype: int64
In [50]:
df['Type'].value_counts()
Out[50]:
Type Rain 4992615 Fog 2009035 Snow 1156334 Cold 231232 Precipitation 157036 Storm 61096 Hail 2921 Name: count, dtype: int64
In [51]:
#count amount of severe storms
df[df['Type'] == 'Storm']['Severity'].value_counts()
Out[51]:
Severity Severe 61096 Name: count, dtype: int64
In [52]:
storms = df[df['Type'] == 'Storm']
storms['Severity'].value_counts()
Out[52]:
Severity Severe 61096 Name: count, dtype: int64
In [53]:
m = folium.Map(location=[storms['LocationLat'].mean(), storms['LocationLng'].mean()], zoom_start=4)
for index, row in storms.iterrows():
folium.CircleMarker(
[row['LocationLat'], row['LocationLng']],
radius=5,
color='red',
fill=True,
fill_color='red'
).add_to(m)
m
Out[53]:
Make this Notebook Trusted to load map: File -> Trust Notebook